In [1]:
%matplotlib inline
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn import cross_validation as cv
from sklearn.cross_validation import train_test_split as tts
from sklearn.linear_model import Ridge
from sklearn.linear_model import RandomizedLasso
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error as mse
In [2]:
#Load the sensors dataset
sensor = pd.read_csv('sensor_updated.csv')
sensor.info()
sensor.head()
Out[2]:
In [3]:
#Load the occupancy dataset
occupancy = pd.read_csv('image_variations.csv')
occupancy.info()
occupancy.head()
Out[3]:
In [4]:
#Merge the two datasets by datetime
df = pd.merge(sensor, occupancy[['datetime','rolling_rms']], on='datetime', how='inner')
df.info()
In [5]:
#Drop rows with any NaN values
df = df[pd.notnull(df['temperature'])]
In [6]:
#Round the rolling_rms feature to integer
df.rolling_rms = df.rolling_rms.round()
In [7]:
#Drop the datetime feature
df = df.drop('datetime', 1)
In [8]:
np.where(np.isnan(df))
Out[8]:
In [9]:
pd.scatter_matrix(df, alpha=0.2, figsize=(18,18), diagonal='kde')
plt.show()
In [10]:
df_features = df.ix[:,0:-1]
df_labels = df.ix[:,-1]
In [11]:
splits = cv.train_test_split(df_features, df_labels, test_size=0.2)
X_train, X_test, y_train, y_test = splits
In [12]:
model = Ridge(alpha=0.1)
model.fit(X_train, y_train)
expected = y_test
predicted = model.predict(X_test)
print("Ridge Regression model")
print("Mean Squared Error: %0.3f" % mse(expected, predicted))
print("Coefficient of Determination: %0.3f" % r2_score(expected, predicted))
In [13]:
model = RandomForestRegressor()
model.fit(X_train, y_train)
expected = y_test
predicted = model.predict(X_test)
print("Random Forest model")
print("Mean squared error = %0.3f" % mse(expected, predicted))
print("R2 score = %0.3f" % r2_score(expected, predicted))
In [14]:
import time
from sklearn import metrics
from sklearn import cross_validation
from sklearn.cross_validation import KFold
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
In [15]:
df.describe()
Out[15]:
In [16]:
def occupancy(c):
if c['rolling_rms'] < 10:
return '1'
elif c['rolling_rms'] > 20:
return '3'
else:
return '2'
df['occupancy'] = df.apply(occupancy, axis=1)
df = df.drop('rolling_rms', 1)
In [17]:
data = df.iloc[:, 0:-1]
target = df.iloc[:, -1]
In [18]:
model = SVC()
model.fit(X_train, y_train)
expected = y_test
predicted = model.predict(X_test)
accuracy = metrics.accuracy_score(expected, predicted)
print("SVM Classifier")
print("Mean squared error = %0.3f" % mse(expected, predicted))
print(accuracy)
In [19]:
model = KNeighborsClassifier()
model.fit(X_train, y_train)
expected = y_test
predicted = model.predict(X_test)
accuracy = metrics.accuracy_score(expected, predicted)
print("K Neighbors Classifier")
print("Mean squared error = %0.3f" % mse(expected, predicted))
print(accuracy)
In [20]:
model = RandomForestClassifier()
model.fit(X_train, y_train)
expected = y_test
predicted = model.predict(X_test)
accuracy = metrics.accuracy_score(expected, predicted)
print("Random Forest Classifierr")
print("Mean squared error = %0.3f" % mse(expected, predicted))
print(accuracy)
In [ ]: